%load_ext autoreload
%autoreload 2
%matplotlib inline
import pandas as pd
import numpy as np
The autoreload extension is already loaded. To reload it, use: %reload_ext autoreload
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
plotly.offline.init_notebook_mode(connected=True)
ppsurv = pd.read_csv('plotly_example_data.csv')
ppsurv.head()
Unnamed: 0 | iter | model_cohort | sex | level_3 | event_time | survival | |
---|---|---|---|---|---|---|---|
0 | 0 | 0 | test model | female | 0 | 0.000000 | 1.000000 |
1 | 1 | 0 | test model | female | 1 | 2.615961 | 1.000000 |
2 | 2 | 0 | test model | female | 2 | 3.584694 | 0.977162 |
3 | 3 | 0 | test model | female | 3 | 4.379338 | 0.952078 |
4 | 4 | 0 | test model | female | 4 | 6.253546 | 0.945339 |
ppsummary = ppsurv.groupby(['sex','event_time'])['survival'].agg({
'95_lower': lambda x: np.percentile(x, 2.5),
'95_upper': lambda x: np.percentile(x, 97.5),
'50_lower': lambda x: np.percentile(x, 25),
'50_upper': lambda x: np.percentile(x, 75),
'median': lambda x: np.percentile(x, 50),
}).reset_index()
ppsummary[ppsummary['sex']=='female'].tail()
sex | event_time | median | 50_lower | 95_lower | 95_upper | 50_upper | |
---|---|---|---|---|---|---|---|
72 | female | 18.543842 | 0.338076 | 0.283789 | 0.182441 | 0.519674 | 0.405841 |
73 | female | 18.656898 | 0.331524 | 0.275271 | 0.176058 | 0.507488 | 0.398727 |
74 | female | 18.932325 | 0.320479 | 0.269188 | 0.176320 | 0.492980 | 0.384687 |
75 | female | 19.811832 | 0.306252 | 0.251404 | 0.157844 | 0.471677 | 0.367278 |
76 | female | 20.000000 | 0.284725 | 0.229034 | 0.134397 | 0.467671 | 0.344815 |
shade_colors = dict(male='rgba(0, 128, 128, {})', female='rgba(214, 12, 140, {})')
line_colors = dict(male='rgb(0, 128, 128)', female='rgb(214, 12, 140)')
ppsummary.sort_values(['sex', 'event_time'], inplace=True)
data5 = list()
for grp, grp_df in ppsummary.groupby('sex'):
x = list(grp_df['event_time'].values)
x_rev = x[::-1]
y_upper = list(grp_df['50_upper'].values)
y_lower = list(grp_df['50_lower'].values)
y_lower = y_lower[::-1]
y2_upper = list(grp_df['95_upper'].values)
y2_lower = list(grp_df['95_lower'].values)
y2_lower = y2_lower[::-1]
y = list(grp_df['median'].values)
my_shading50 = go.Scatter(
x = x + x_rev,
y = y_upper + y_lower,
fill = 'tozerox',
fillcolor = shade_colors[grp].format(0.3),
line = go.Line(color = 'transparent'),
showlegend = True,
name = '{} - 50% CI'.format(grp),
)
my_shading95 = go.Scatter(
x = x + x_rev,
y = y2_upper + y2_lower,
fill = 'tozerox',
fillcolor = shade_colors[grp].format(0.1),
line = go.Line(color = 'transparent'),
showlegend = True,
name = '{} - 95% CI'.format(grp),
)
my_line = go.Scatter(
x = x,
y = y,
line = go.Line(color=line_colors[grp]),
mode = 'lines',
name = grp,
)
data5.append(my_line)
data5.append(my_shading50)
data5.append(my_shading95)
layout5 = go.Layout(
yaxis=dict(
title='Survival (%)',
#zeroline=False,
tickformat='.0%',
),
xaxis=dict(title='Days since enrollment')
)
py.iplot(go.Figure(data=data5, layout=layout5), filename='survivalstan/posterior-predicted-values')